robust.se <- function(model, cluster = NULL){
  #Adapted from Drew Dimmery's website: http://drewdimmery.com/robust-ses-in-r/
  if(is.null(cluster) == TRUE){
    cluster <- 1:nrow(model$model)
  }
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- model$rank
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  estfun_out <- data.frame(sandwich::estfun(model))
  estfun_out$cluster <- cluster
  uj <- dplyr::summarise_each(dplyr::group_by(estfun_out, cluster), dplyr::funs(sum))
  uj$cluster <- NULL
  uj <- as.matrix(uj)
  rcse.cov <- dfc * sandwich::sandwich(model, meat = crossprod(uj)/N)
  rcse.se <- lmtest::coeftest(model, rcse.cov)
  return(list(rcse.cov, rcse.se))
}

gen_wildbs <- function(null_models, full_models, cluster_list, clusters, coef_list, permute = TRUE){
  #Wild Parametric Bootstrap. The null and full models are
  #equal-sized lists of formulas that specify the models to be
  #tested. The argument 'varnames' specifies the coefficients for
  #which a p-value needs to be generated in the full model (only
  #one per model)
  

  resids_null <- lapply(null_models, function(x) x$residuals)
  fitted_null <- lapply(null_models, function(x) x$fitted.values)
  
  if(permute == TRUE){
    wild_perm <-  dplyr::summarise(dplyr::group_by(cluster_list, cluster), wild_perm = sample(c(1,-1), 1))
  }
  
  if(permute == FALSE){
    wild_perm <-  dplyr::summarise(dplyr::group_by(cluster_list, cluster), wild_perm = 1)
  }
  
  
  bs_data <- lapply(full_models, function(x) data.frame(x$response, x$X, x$fe, x$clustervar))
  # loop over hypotheses to test
  for(i in 1:length(bs_data)){
    bs_data[[i]]$cluster <- clusters[[i]]
    bs_data[[i]] <- dplyr::left_join((bs_data[[i]]), wild_perm, by = "cluster")
    bs_data[[i]][, 1] <- fitted_null[[i]] + resids_null[[i]] * bs_data[[i]]$wild_perm
  }
  
  # Estimate models on bootstrapped data
  models_bs <- mapply(full_models, bs_data,
                      FUN = function(x, y) felm(formula = x$formula[[1]], data = y, keepX = TRUE), SIMPLIFY = FALSE) 
  for(i in 1:length(models_bs)){
    models_bs[[i]]$cluster <- bs_data[[i]]$cluster
  }
  #Estimate cluster robust VCOV and pvals
  mapply(models_bs, coef_list, FUN = function(x, y) x$ctval[y])
}

gen_pairsbs <- function(null_models, full_models, cluster_list, clusters, coef_list, permute = TRUE){
  #Pairs Non-Parametric Bootstrap. The null and full models are
  #equal-sized lists of formulas that specify the models to be
  #tested. The argument 'varnames' specifies the coefficients for
  #which a p-value needs to be generated in the full model (only
  #one per model)
  
  if(permute == TRUE){
    bs_clusters <-  unique(cluster_list)[sample(1:nrow(unique(cluster_list)), nrow(unique(cluster_list)), replace = TRUE), ,drop = FALSE]
    bs_clusters$cluster <- as.character(bs_clusters$cluster)
    bs_clusters$bs_cluster <- as.character(1:nrow(bs_clusters))
  }
  
  if(permute == FALSE){
    bs_clusters <- unique(cluster_list)
    bs_clusters$cluster <- as.character(bs_clusters$cluster)
    bs_clusters$bs_cluster <- as.character(1:nrow(bs_clusters))
  }
  
  
  bs_data <- lapply(full_models, model.frame)
  # loop over hypotheses to test
  for(i in 1:length(bs_data)){
    bs_data[[i]]$cluster <- as.character(clusters[[i]])
    bs_data[[i]] <- dplyr::left_join(bs_clusters, bs_data[[i]], by = "cluster")
  }
  
  # Estimate models on bootstrapped data
  models_bs <- mapply(full_models, bs_data,
                      FUN = function(x, y) lm(formula = formula(x), data = y), SIMPLIFY = FALSE)
  for(i in 1:length(models_bs)){
    models_bs[[i]]$cluster <- bs_data[[i]]$bs_cluster
  }
  #Estimate cluster robust VCOV and pvals
  bs_vcovs <- lapply(models_bs,
                     function(x) robust.se(x , x$cluster))
  full_vcovs <- lapply(full_models,
                       function(x) robust.se(x , x$cluster))
  if(permute == FALSE){
    return(mapply(bs_vcovs, coef_list, FUN = function(x, y) x[[2]][y ,'t value']))
  }
  if(permute == TRUE){
    return((mapply(bs_vcovs, coef_list, FUN = function(x, y) x[[2]][y ,'Estimate']) - mapply(full_vcovs, coef_list, FUN = function(x, y) x[[2]][y ,'Estimate'])) / mapply(bs_vcovs, coef_list, FUN = function(x, y) x[[2]][y ,'Std. Error']))
  }
  
}

free_stepdown <- function(tstats, rep_tstats){
  tstat_order <- rank(abs(tstats), ties.method = "random")
  bs_qvals <- matrix(ncol = ncol(rep_tstats), nrow = nrow(rep_tstats))
  for(i in 1:ncol(bs_qvals)){
    for(j in 1:length(tstats)){
      if(j == 1){
        bs_qvals[which(tstat_order == j), i] <- abs(rep_tstats[which(tstat_order == 1), i][[1]])
      }
      else{
        bs_qvals[which(tstat_order == j), i] <-  max(abs(rep_tstats[which(tstat_order == j), i][[1]]),
                                                     abs(rep_tstats[which(tstat_order == j-1), i][[1]]))
      }
    }
  }
  
  bs_pvals_adjusted <- rep(NA, length(tstats))
  for(i in 1:length(tstats)){
    bs_pvals_adjusted[i] <- (sum(bs_qvals[i, ] >= abs(tstats[i])) + 1) / (ncol(rep_tstats) + 1)
  }
  
  #Enforce Montonicty
  for(j in 0:(length(bs_pvals_adjusted)-1)){
    if(j == 0){
      bs_pvals_adjusted[which(tstat_order == length(tstats))] <-  bs_pvals_adjusted[which(tstat_order == length(tstats))]
    }
    else{
      bs_pvals_adjusted[which(tstat_order == length(tstats)-j)] <-  max(bs_pvals_adjusted[which(tstat_order == length(tstats)-j)],
                                                                        bs_pvals_adjusted[which(tstat_order == length(tstats) - (j-1))])
    }
  }
  bs_pvals_adjusted
}

boot_stepdown <- function(null_formulas, full_formulas, data, coef_list, subsets = NULL, cluster = NULL,
                          nboots = 10000, boot_type = "wild", parallel = TRUE){
  
  if(is.null(cluster) == TRUE){
    #If data is not clustered, each obsrevation gets a unique cluster id
    data$cluster <- as.character(1:nrow(data))
  }
  
  if(is.null(cluster) == FALSE){
    data$cluster <- as.character(data[, cluster])
  }
  
  #Create datasets for each pair of null and full models
  if(is.null(subsets) == FALSE){
    datasets <- lapply(subsets, function(x) subset(data, eval(parse(text = x))))
  }
  if(is.null(subsets) == TRUE){
    datasets <- vector('list', length(null_formulas))
    for(i in 1:length(datasets)){
      datasets[[i]] <- data
    }
  }
  
  # Only keep variables uses to test hypotheses
  datasets <-  mapply(null_formulas, full_formulas, datasets, FUN = function(x,y,z)
    na.omit(z[, unique(c(all.vars(x), all.vars(y), "cluster"))]), SIMPLIFY = FALSE)
  
  clusters <- lapply(datasets, function(x) x$cluster)
  

  full_models <- mapply(full_formulas, datasets, FUN = function(x, y)
    felm(x, data = y, keepX = TRUE), SIMPLIFY = FALSE)
  for(i in 1:length(full_models)){
    full_models[[i]]$formula <- full_formulas[i]
  }
  

  null_models <- mapply(null_formulas, datasets, FUN = function(x, y)
    felm(x, data = y, keepX = TRUE), SIMPLIFY = FALSE)
  
  boot_cluster <- data.frame(cluster = as.character(data$cluster),
                             stringsAsFactors = FALSE)
  
  for(i in 1:length(full_models)){
    full_models[[i]]$cluster <- as.character(datasets[[i]]$cluster)
  }
  
  tstats <- gen_wildbs(null_models = null_models, full_models = full_models,
                       cluster_list = boot_cluster, clusters = clusters, coef_list = coef_list, permute = FALSE)
  if(parallel == FALSE){
    rep_tstats <- matrix(nrow = length(tstats), ncol = nboots)
    progress_bar <- txtProgressBar(1, nboots)
    for(i in 1:nboots){
      rep_tstats[,i] <- gen_wildbs(null_models = null_models,
                                   full_models = full_models,
                                   cluster_list = boot_cluster,
                                   clusters = clusters,
                                   coef_list = coef_list)
      setTxtProgressBar(progress_bar, i)
    }
  }
  if(parallel == TRUE){
    rep_tstats <-  foreach::foreach(i=1:nboots, .combine = 'cbind',
                                    .export = c("gen_wildbs"),
                                    .packages = c('sandwich', 'lmtest', 'lfe')) %dopar%{
                                      gen_wildbs(null_models = null_models,
                                                 full_models = full_models,
                                                 clusters = clusters,
                                                 cluster_list = boot_cluster,
                                                 coef_list = coef_list)
                                    }
  }
  
  #Generate bootstrap unadjusted p-values
  bs_pvals <- rep(NA, length(tstats))
  for(i in 1:length(tstats)){
    bs_pvals[i] <- (sum(abs(rep_tstats[i, ]) >= abs(tstats[i])) + 1) / (nboots + 1)
  }
  bs_pvalues_adjusted <- free_stepdown(tstats = tstats, rep_tstats = rep_tstats)
  
  data.frame(Hypothesis = paste0("Hypothesis ", 1:length(full_formulas)),
             Variable = coef_list,
             bs_pvalues_unadjusted = bs_pvals,
             bs_pvalues_adjusted = bs_pvalues_adjusted)
}
